This work aims to uncover spatial and temporal crime patterns in Chicago. The study of spatial and temporal crime patterns is essential since it helps advance academic understanding of criminal activities and provides insight into criminal events, which benefits optimizing police presence and public safety. This work focuses on investigating and visualizing crime patterns in Chicago based on the unit of community. How do spatial crime patterns differ across crime types, and how do spatial crime patterns change across different time scales (e.g., day vs. night, month, year)? This notebook walks through the whole project from collecting data to generating results.
# import required libraries
%matplotlib inline
import os
import fiona
import pprint
import IPython
from matplotlib import pyplot as plt
import pandas as pd
import geopandas as gpd
import folium
from folium.plugins import HeatMap, HeatMapWithTime
project/
│ Project Crimes in Chicago.ipynb
│
└───data/
│ │ Community.zip
│ │ chicago_pop.csv
│ │ rows.csv
│ │
└───result/
│ ...
We need three data for this project:
# Run this line at the first time to get chicago crime data
! wget -P ./data/ https://data.cityofchicago.org/api/views/ijzp-q8t2/rows.csv
# Read data
crimes = pd.read_csv('data/rows.csv')
community_geo = gpd.read_file(os.path.join(r'data/Community.zip'))[['area_numbe', 'community', 'geometry']]
community_pop = pd.read_csv('data/chicago_pop.csv').iloc[:,0:5]
crimes.head(3)
community_geo.head(3)
community_pop.head(3)
def cleandata(dataset):
# necessary information in the columns
columns_tokeep = ['Date','Year','Primary Type','Community Area','X Coordinate',
'Y Coordinate','Latitude', 'Longitude']
dataset = dataset[columns_tokeep]
# columns who should not have nan values
columns_dropnan = ['Date','Year','Primary Type','Community Area','X Coordinate',
'Y Coordinate','Latitude', 'Longitude']
dataset = dataset.dropna(how='any')
# change dtype of columns
dataset['Date'] = pd.to_datetime(dataset['Date'])
dataset['Community Area'] = dataset['Community Area'].astype(int)
# time analysis
# 6 - 18 - Day
# 0 - 6; 18 - 0 - Night
dataset['Month'] = dataset['Date'].dt.month
dataset['DayNight'] = dataset['Date'].dt.hour // 6
dataset['DayNight'] = dataset['DayNight'].replace({0: 'Night', 1: 'Day',
2: 'Day', 3: 'Night'})
# Primary Types re-catergory
property_loss_list = ['THEFT','BURGLARY', 'MOTOR VEHICLE THEFT', 'DECEPTIVE PRACTICE']
safety_list = ['BATTERY', 'WEAPONS VIOLATION', 'CRIMINAL DAMAGE', 'ASSAULT',
'ROBBERY', 'SEX OFFENSE', 'CRIM SEXUAL ASSAULT', 'ARSON',
'HOMICIDE', 'KIDNAPPING', 'CRIMINAL SEXUAL ASSAULT', 'INTIMIDATION',
'STALKING', 'CONCEALED CARRY LICENSE VIOLATION', 'PUBLIC INDECENCY',
'HUMAN TRAFFICKING', 'DOMESTIC VIOLENCE']
others_list = ['NARCOTICS', 'OTHER OFFENSE', 'CRIMINAL TRESPASS', 'PROSTITUTION',
'OFFENSE INVOLVING CHILDREN', 'PUBLIC PEACE VIOLATION',
'INTERFERENCE WITH PUBLIC OFFICER', 'LIQUOR LAW VIOLATION',
'GAMBLING', 'OBSCENITY', 'NON-CRIMINAL', 'OTHER NARCOTIC VIOLATION',
'NON - CRIMINAL', 'NON - CRIMINAL', 'RITUALISM', 'NON-CRIMINAL (SUBJECT SPECIFIED)']
dataset = dataset.replace({'Primary Type': dict.fromkeys(property_loss_list, 'Property')})
dataset = dataset.replace({'Primary Type': dict.fromkeys(safety_list, 'Safety')})
dataset = dataset.replace({'Primary Type': dict.fromkeys(others_list, 'Others')})
return dataset
crimes_clean = cleandata(crimes)
# 6882748 after clean
crimes_clean.head(3)
crimes_count = crimes_clean.groupby(['Year','Month','DayNight','Primary Type','Community Area']).agg({'Date':'count'}).reset_index()
crimes_count = crimes_count.rename(columns={'Date': 'Count'})
crimes_count.head()
# Here, just want a point represent for each community, thus I calculate on
# geographic CRS directly, a right way should re-project geometries to a
# projected CRS before this operation
community_geo['Community Point'] = community_geo.centroid
community_geo['community_x'] = community_geo['Community Point'].x
community_geo['community_y'] = community_geo['Community Point'].y
community_geo = community_geo.rename(columns={'area_numbe': 'Community Area'})
community_geo['Community Area'] = community_geo['Community Area'].astype(int)
community_geo.head(3)
def CrimesHeatMapWithTime(crimes_count, community_geo, bytime, dayornight = None, crimetype = None):
if bytime not in ['Year', 'Month']:
return 0
if dayornight is not None:
if dayornight == 'Day' or dayornight == 'Night':
crimes_count = crimes_count.loc[crimes_count['DayNight'] == dayornight]
else:
return 0
if crimetype is not None:
if crimetype in ['Property', 'Safety', 'Others']:
crimes_count = crimes_count.loc[crimes_count['Primary Type'] == crimetype]
else:
return 0
crimes_count_bytime = crimes_count.groupby([bytime,'Community Area']).agg({'Count':'sum'}).reset_index()
time_index = list(crimes_count_bytime[bytime].sort_values().astype('str').unique())
crimes_count_bytime = crimes_count_bytime.merge(community_geo, on='Community Area')
crimes_count_bytime = crimes_count_bytime.sort_values(by = [bytime,'Community Area'], ascending=True)
data = []
for _, d in crimes_count_bytime.groupby(bytime):
data.append([[row['community_y'], row['community_x'], row['Count']] for _, row in d.iterrows()])
heatmap_layer = HeatMapWithTime(data,
index=time_index,
auto_play=True,
use_local_extrema=True
)
return heatmap_layer
heatmap = folium.Map(location=[41.87, -87.62], # Chicago
tiles='stamentoner',#'cartodbpositron', stamentoner
zoom_start=10.2,
control_scale=True)
# # Add communitiy boundry
# folium.GeoJson(
# data=community_geo['geometry'],
# style_function = lambda x: {'fillOpacity' : 0, 'weight': 1, 'Opacity' : 0.1}
# ).add_to(heatmap)
CrimesHeatMapWithTime(crimes_count, community_geo, 'Year').add_to(heatmap)
heatmap
# heatmap.save('result/year.html')
heatmap = folium.Map(location=[41.87, -87.62], # Chicago
tiles='stamentoner',#'cartodbpositron', stamentoner
zoom_start=10.2,
control_scale=True)
CrimesHeatMapWithTime(crimes_count, community_geo, 'Month').add_to(heatmap)
# heatmap.save('result/month.html')
heatmap
daynightmap = folium.plugins.DualMap(location=[41.8, -87.62], tiles='stamentoner', zoom_start=10)
CrimesHeatMapWithTime(crimes_count, community_geo, 'Year', dayornight='Day').add_to(daynightmap.m1)
CrimesHeatMapWithTime(crimes_count, community_geo, 'Year', dayornight='Night').add_to(daynightmap.m2)
# daynightmap.save('result/daynight.html')
daynightmap
crimetypemap = folium.plugins.DualMap(location=[41.8, -87.62], tiles='stamentoner', zoom_start=10)
CrimesHeatMapWithTime(crimes_count, community_geo, 'Year', crimetype = 'Safety').add_to(crimetypemap.m1)
CrimesHeatMapWithTime(crimes_count, community_geo, 'Year', crimetype = 'Property').add_to(crimetypemap.m2)
# crimetypemap.save('result/crimetype.html')
crimetypemap
def find_pop(community_pop, row):
index = (row['Year']-1996)//10 + 1
pop = community_pop[community_pop['no'] == row['Community Area']].iloc[:,-index]
return pop
def CrimesDenseHeatMapWithTime(crimes_count, community_geo, bytime, dayornight = None, crimetype = None):
if bytime not in ['Year', 'Month']:
return 0
if dayornight is not None:
if dayornight == 'Day' or dayornight == 'Night':
crimes_count = crimes_count.loc[crimes_count['DayNight'] == dayornight]
else:
return 0
if crimetype is not None:
if crimetype in ['Property', 'Safety', 'Others']:
crimes_count = crimes_count.loc[crimes_count['Primary Type'] == crimetype]
else:
return 0
crimes_count_bytime = crimes_count.groupby([bytime,'Community Area']).agg({'Count':'sum'}).reset_index()
time_index = list(crimes_count_bytime[bytime].sort_values().astype('str').unique())
crimes_count_bytime = crimes_count_bytime.merge(community_geo, on='Community Area')
crimes_count_bytime = crimes_count_bytime.sort_values(by = [bytime,'Community Area'], ascending=True)
# divided by population
crimes_count_bytime['pop'] = crimes_count_bytime.apply(lambda row: find_pop(community_pop, row), axis=1).sum(axis=1)
crimes_count_bytime['count_pop'] = crimes_count_bytime['Count']/crimes_count_bytime['pop']
data = []
for _, d in crimes_count_bytime.groupby(bytime):
data.append([[row['community_y'], row['community_x'], row['count_pop']] for _, row in d.iterrows()])
heatmap_layer = HeatMapWithTime(data,
index=time_index,
auto_play=True,
use_local_extrema=True
)
return heatmap_layer
heatmap = folium.Map(location=[41.87, -87.62], # Chicago
tiles='stamentoner',#'cartodbpositron', stamentoner
zoom_start=10.2,
control_scale=True)
CrimesDenseHeatMapWithTime(crimes_count, community_geo, 'Year').add_to(heatmap)
# heatmap.save('result/yearpop.html')
heatmap
popmap = folium.plugins.DualMap(location=[41.8, -87.62], tiles='stamentoner', zoom_start=10)
CrimesHeatMapWithTime(crimes_count, community_geo, 'Year').add_to(popmap.m1)
CrimesDenseHeatMapWithTime(crimes_count, community_geo, 'Year').add_to(popmap.m2)
# popmap.save('result/yearpopcom.html')
popmap
popcrimetypemap = folium.plugins.DualMap(location=[41.8, -87.62], tiles='stamentoner', zoom_start=10)
CrimesDenseHeatMapWithTime(crimes_count, community_geo, 'Year', crimetype = 'Safety').add_to(popcrimetypemap.m1)
CrimesDenseHeatMapWithTime(crimes_count, community_geo, 'Year', crimetype = 'Property').add_to(popcrimetypemap.m2)
# popcrimetypemap.save('result/crimetypepop.html')
popcrimetypemap